This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
rm(list = ls(all=TRUE)) # clear global environment
library(tidyr) # data loading
library(dplyr) #dataframe manipulation
library(stringr) # string manipulation
# library(lubridate, warn.conflicts = FALSE) # work with date formats
library(ggplot2) # plotting
options(scipen = 999) # number formatting
path <- file.path("C:","Users","Paulina-laptop","Desktop","RRR")
setwd(path)
sprintf("Current directory: %s", getwd())
## [1] "Current directory: C:/Users/Paulina-laptop/Desktop/RRR"
Let’s load some data first
wells <- read.csv(unzip("BCOGC_data/prod_csv.zip", "zone_prd_2016_to_present.csv"), skip = 1)
head(wells)
## Wa_num Compltn_event_seq Prod_period UWI Area_code Formtn_code
## 1 29 0 201601 100142108318W600 3600 6200
## 2 29 0 201603 100142108318W600 3600 6200
## 3 29 0 201611 100142108318W600 3600 6200
## 4 29 0 201612 100142108318W600 3600 6200
## 5 29 0 201701 100142108318W600 3600 6200
## 6 29 0 201702 100142108318W600 3600 6200
## Pool_seq Gas_prod_vol..e3m3. Oil_prod_vol..m3. Water_prod_vol..m3.
## 1 A 107.7 0 2.3
## 2 A 45.2 0 1.7
## 3 A 166.2 0 5.0
## 4 A 159.2 0 2.6
## 5 A 133.5 0 3.7
## 6 A 95.8 0 4.1
## Cond_prod_vol..m3. Prod_days. Gas_prod_cum..e3m3. Oil_prod_cum..m3.
## 1 0.5 31.0 107.7 0
## 2 0.3 13.0 152.9 0
## 3 1.3 22.0 319.1 0
## 4 0.5 31.0 478.3 0
## 5 0.6 31.0 611.8 0
## 6 0.5 27.8 707.6 0
## Water_prod_cum..m3. Cond_prod_cum..m3. Project_code
## 1 2.3 0.5 0
## 2 4.0 0.8 0
## 3 9.0 2.1 0
## 4 11.6 2.6 0
## 5 15.3 3.2 0
## 6 19.4 3.7 0
Column names in the original file have many strange interpunction, which could be easily fixed:
names(wells) <- str_replace_all(names(wells), c(" " = "" ,
"," = "",
"\\("="_",
"\\)"="",
"\\.."="_",
"\\."=""
))
names(wells)
## [1] "Wa_num" "Compltn_event_seq" "Prod_period"
## [4] "UWI" "Area_code" "Formtn_code"
## [7] "Pool_seq" "Gas_prod_vol_e3m3" "Oil_prod_vol_m3"
## [10] "Water_prod_vol_m3" "Cond_prod_vol_m3" "Prod_days"
## [13] "Gas_prod_cum_e3m3" "Oil_prod_cum_m3" "Water_prod_cum_m3"
## [16] "Cond_prod_cum_m3" "Project_code"
wells <- subset(wells, select=c(Wa_num, Prod_period, Prod_days, Area_code, Formtn_code, Gas_prod_vol_e3m3, Oil_prod_vol_m3, Water_prod_vol_m3, Cond_prod_vol_m3)) # select only useful columns
area_codes <- read.table("BCOGC_data/ogc_area_codes.csv",
sep = ",", skip = 1, header = TRUE, stringsAsFactors = FALSE)
head(area_codes,3)
## Area.Code Area.Name
## 1 50 ADSETT
## 2 100 AIRPORT
## 3 200 AITKEN CREEK
area_codes <- rename(area_codes, Area_code = Area.Code, Area_name = Area.Name)
formation_codes <- read.table("BCOGC_data/ogc_formation_codes.csv",
sep = ",", skip = 1, header = TRUE, stringsAsFactors = FALSE)
head(formation_codes,3)
## Formation.Code Formation.Name
## 1 4610 A MARKER/BASE OF LIME
## 2 2703 AITKEN CREEK
## 3 9922 ALDRIDGE
formation_codes <- rename(formation_codes, Formtn_code = Formation.Code, Formtn_name = Formation.Name)
Merge dataframes to create 2 additional columns with area and name as the wells file contains only its numerical codes.
wells <- merge(wells, area_codes, by = 'Area_code')
wells <- merge(wells, formation_codes, by = 'Formtn_code')
# we don't need them anymore in memory and dataframe
rm(area_codes, formation_codes) # remove form memory
wells <- subset(wells, select = -c(Area_code, Formtn_code)) # remove from dataframe
First glimpse on the data. Notice newly created columns, Area_name and Formtn_name at the end.
names(wells)
## [1] "Wa_num" "Prod_period" "Prod_days"
## [4] "Gas_prod_vol_e3m3" "Oil_prod_vol_m3" "Water_prod_vol_m3"
## [7] "Cond_prod_vol_m3" "Area_name" "Formtn_name"
str(wells)
## 'data.frame': 517518 obs. of 9 variables:
## $ Wa_num : int 14893 25370 25370 26962 14893 14893 25371 25371 25371 26962 ...
## $ Prod_period : int 201809 202003 201901 201709 201805 202001 202011 201912 201805 201907 ...
## $ Prod_days : num 30 30.7 30.8 23 31 31 28.5 30.4 30.4 28 ...
## $ Gas_prod_vol_e3m3: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Oil_prod_vol_m3 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Water_prod_vol_m3: num 91.4 1291 2528 1068 6948.7 ...
## $ Cond_prod_vol_m3 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Area_name : chr "DESAN" "PEEJAY WEST" "PEEJAY WEST" "BEAVERTAIL" ...
## $ Formtn_name : chr "QUATERNARY" "QUATERNARY" "QUATERNARY" "QUATERNARY" ...
summary(wells)
## Wa_num Prod_period Prod_days Gas_prod_vol_e3m3
## Min. : 29 Min. :201601 Min. : 0.00 Min. : 0.0
## 1st Qu.:15183 1st Qu.:201704 1st Qu.:25.60 1st Qu.: 38.2
## Median :22328 Median :201807 Median :30.00 Median : 124.8
## Mean :21074 Mean :201810 Mean :26.08 Mean : 555.4
## 3rd Qu.:28257 3rd Qu.:201910 3rd Qu.:31.00 3rd Qu.: 608.5
## Max. :41055 Max. :202101 Max. :31.10 Max. :119042.3
## Oil_prod_vol_m3 Water_prod_vol_m3 Cond_prod_vol_m3 Area_name
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Length:517518
## 1st Qu.: 0.00 1st Qu.: 0.9 1st Qu.: 0.00 Class :character
## Median : 0.00 Median : 5.0 Median : 0.00 Mode :character
## Mean : 10.77 Mean : 181.4 Mean : 19.31
## 3rd Qu.: 0.00 3rd Qu.: 52.6 3rd Qu.: 1.00
## Max. :7089.30 Max. :29177.7 Max. :7424.00
## Formtn_name
## Length:517518
## Class :character
## Mode :character
##
##
##
length(unique(wells$Formtn_name)) # number of formations
## [1] 92
unique(wells$Formtn_name)
## [1] "QUATERNARY" "CRETACEOUS"
## [3] "CARDIUM SAND" "DOE CREEK"
## [5] "DUNVEGAN" "BASE OF FISH SCALES"
## [7] "SCATTER" "PADDY"
## [9] "PADDY- CADOTTE" "CADOTTE"
## [11] "SPIRIT RIVER" "NOTIKEWIN"
## [13] "FALHER" "FALHER A"
## [15] "FALHER B" "FALHER C"
## [17] "FALHER D" "WILRICH"
## [19] "BLUESKY" "BASAL BLUESKY"
## [21] "BLUESKY-GETHING" "BLUESKY-GETHING-DETRITAL"
## [23] "DETRITAL" "GETHING"
## [25] "LOWER GETHING" "BASAL GETHING"
## [27] "GETHING-BALDONNEL" "CADOMIN"
## [29] "CHINKEH" "NIKANASSIN"
## [31] "DUNLEVY" "LOWER DUNLEVY"
## [33] "ROCK CREEK" "NORDEGG"
## [35] "NORDEGG-BALDONNEL" "PARDONET-BALDONNEL"
## [37] "BALDONNEL" "BALDONNEL/UPPER CHARLIE LAKE"
## [39] "CHARLIE LAKE" "SIPHON"
## [41] "CECIL" "NANCY"
## [43] "FLATROCK" "BOUNDARY LAKE"
## [45] "YELLOW MARKER" "COPLIN"
## [47] "MICA" "BLUEBERRY"
## [49] "INGA" "NORTH PINE"
## [51] "BEAR FLAT" "PINGEL"
## [53] "LIMESTONE A BED" "A MARKER/BASE OF LIME"
## [55] "LOWER CHARLIE LAKE SANDS" "ARTEX"
## [57] "ARTEX/HALFWAY" "UPPER HALFWAY"
## [59] "HALFWAY" "LOWER HALFWAY"
## [61] "DOIG" "DOIG PHOSPHATE BEDS"
## [63] "BLUESKY-GETHING-MONTNEY" "LOWER CHARLIE LAKE/MONTNEY"
## [65] "DOIG PHOSPHATE-MONTNEY" "MONTNEY"
## [67] "BELLOY" "BELCOURT"
## [69] "BELCOURT-TAYLOR FLAT" "BELLOY-KISKATINAW"
## [71] "TAYLOR FLAT" "MISSISSIPPIAN"
## [73] "KISKATINAW" "LOWER KISKATINAW"
## [75] "BASAL KISKATINAW" "UPPER DEBOLT"
## [77] "DEBOLT" "ELKTON"
## [79] "SHUNDA" "PEKISKO"
## [81] "BANFF" "BESA RIVER"
## [83] "WABAMUN" "TETCHO"
## [85] "KAKISA" "JEAN MARIE"
## [87] "MUSKWA" "MUSKWA-OTTER PARK"
## [89] "SLAVE POINT" "SULPHUR POINT"
## [91] "EVIE" "PINE POINT"
slave_pnt <- filter(wells, Formtn_name == "SLAVE POINT" )
unique(slave_pnt$Formtn_name)
## [1] "SLAVE POINT"
muskwa <- filter(wells, Formtn_name %in% c("MUSKWA", "MUSKWA-OTTER PARK"))
unique(muskwa$Formtn_name)
## [1] "MUSKWA" "MUSKWA-OTTER PARK"
# same as
muskwa <- filter(wells, Formtn_name == "MUSKWA" | Formtn_name == "MUSKWA-OTTER PARK")
unique(muskwa$Formtn_name)
## [1] "MUSKWA" "MUSKWA-OTTER PARK"
Wa_num_sorted <- arrange(wells, Prod_period) # ascending
head(Wa_num_sorted)
## Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1 14893 201601 31 0.0 0
## 2 25371 201601 31 0.0 0
## 3 26962 201601 31 0.0 0
## 4 25370 201601 31 0.0 0
## 5 17824 201601 31 0.1 0
## 6 21469 201601 31 25.5 0
## Water_prod_vol_m3 Cond_prod_vol_m3 Area_name Formtn_name
## 1 4242.0 0 DESAN QUATERNARY
## 2 4823.0 0 PEEJAY WEST QUATERNARY
## 3 1368.0 0 BEAVERTAIL QUATERNARY
## 4 2233.0 0 PEEJAY WEST QUATERNARY
## 5 0.0 0 OJAY CRETACEOUS
## 6 0.6 0 HIDING CREEK CRETACEOUS
Wa_num_sorted <- arrange(wells, desc(Prod_period)) # descending
head(Wa_num_sorted)
## Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1 25370 202101 31 0.0 0
## 2 17557 202101 3 0.0 0
## 3 23561 202101 31 58.2 0
## 4 16585 202101 31 77.1 0
## 5 14164 202101 31 32.0 0
## 6 18101 202101 31 108.5 0
## Water_prod_vol_m3 Cond_prod_vol_m3 Area_name Formtn_name
## 1 798.0 0 PEEJAY WEST QUATERNARY
## 2 27.6 0 HELMET QUATERNARY
## 3 0.7 0 OJAY CRETACEOUS
## 4 1.1 0 OJAY CRETACEOUS
## 5 0.4 0 OJAY CRETACEOUS
## 6 2.2 0 OJAY CRETACEOUS
Wa_num_sorted <- arrange(wells, desc(Prod_period), Wa_num) # by multiple conditions
head(Wa_num_sorted)
## Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1 101 202101 28.5 6.5 47.8
## 2 131 202101 31.0 10.6 0.0
## 3 152 202101 31.0 4.8 17.2
## 4 153 202101 30.7 1829.3 0.0
## 5 180 202101 0.5 1.7 0.0
## 6 244 202101 13.1 16.8 0.0
## Water_prod_vol_m3 Cond_prod_vol_m3 Area_name Formtn_name
## 1 1148.1 0.0 BOUNDARY LAKE BOUNDARY LAKE
## 2 1.1 0.2 NIG CREEK BALDONNEL
## 3 487.6 0.0 BOUNDARY LAKE BOUNDARY LAKE
## 4 32.7 0.0 PARKLAND WABAMUN
## 5 0.0 0.0 BEG HALFWAY
## 6 0.0 0.3 STODDART BELLOY
rm(Wa_num_sorted)
names(wells)
## [1] "Wa_num" "Prod_period" "Prod_days"
## [4] "Gas_prod_vol_e3m3" "Oil_prod_vol_m3" "Water_prod_vol_m3"
## [7] "Cond_prod_vol_m3" "Area_name" "Formtn_name"
head(select(wells, Area_name, Formtn_name))
## Area_name Formtn_name
## 1 DESAN QUATERNARY
## 2 PEEJAY WEST QUATERNARY
## 3 PEEJAY WEST QUATERNARY
## 4 BEAVERTAIL QUATERNARY
## 5 DESAN QUATERNARY
## 6 DESAN QUATERNARY
head(select(wells, Gas_prod_vol_e3m3:Cond_prod_vol_m3, Area_name, Formtn_name))
## Gas_prod_vol_e3m3 Oil_prod_vol_m3 Water_prod_vol_m3 Cond_prod_vol_m3
## 1 0 0 91.4 0
## 2 0 0 1291.0 0
## 3 0 0 2528.0 0
## 4 0 0 1068.0 0
## 5 0 0 6948.7 0
## 6 0 0 109.3 0
## Area_name Formtn_name
## 1 DESAN QUATERNARY
## 2 PEEJAY WEST QUATERNARY
## 3 PEEJAY WEST QUATERNARY
## 4 BEAVERTAIL QUATERNARY
## 5 DESAN QUATERNARY
## 6 DESAN QUATERNARY
The same wells have been listed multiple times as there are various production periods each year. Let’s create new column Prod_year to make more general summary.
head(wells$Prod_period)
## [1] 201809 202003 201901 201709 201805 202001
library(stringr)
wells <- wells %>%
mutate(Prod_year = substr(Prod_period, 1, 4),
Prod_month = substr(Prod_period, 5, 6),
# Prod_ym = as.factor(paste(Prod_year, Prod_month))) #to create time "points" only from available dates
Prod_ym = paste(Prod_year, Prod_month, sep="-")) #to create time "points" only from available dates
head(wells)
## Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1 14893 201809 30.0 0 0
## 2 25370 202003 30.7 0 0
## 3 25370 201901 30.8 0 0
## 4 26962 201709 23.0 0 0
## 5 14893 201805 31.0 0 0
## 6 14893 202001 31.0 0 0
## Water_prod_vol_m3 Cond_prod_vol_m3 Area_name Formtn_name Prod_year
## 1 91.4 0 DESAN QUATERNARY 2018
## 2 1291.0 0 PEEJAY WEST QUATERNARY 2020
## 3 2528.0 0 PEEJAY WEST QUATERNARY 2019
## 4 1068.0 0 BEAVERTAIL QUATERNARY 2017
## 5 6948.7 0 DESAN QUATERNARY 2018
## 6 109.3 0 DESAN QUATERNARY 2020
## Prod_month Prod_ym
## 1 09 2018-09
## 2 03 2020-03
## 3 01 2019-01
## 4 09 2017-09
## 5 05 2018-05
## 6 01 2020-01
class(wells$Prod_ym)
## [1] "character"
head(wells$Prod_period)
## [1] 201809 202003 201901 201709 201805 202001
prod_days_by_area <- wells %>%
group_by(Area_name) %>%
summarize(
total_prod_days = sum(Prod_days)
)
head(prod_days_by_area)
## # A tibble: 6 x 2
## Area_name total_prod_days
## <chr> <dbl>
## 1 ADSETT 6385.
## 2 AIRPORT 141.
## 3 AITKEN CREEK 4164.
## 4 AITKEN CREEK NORTH 1885.
## 5 ALTARES 14861.
## 6 BEAVERDAM 6439.
prod_days_by_area %>% filter(total_prod_days > 500000) %>%
ggplot(aes(x = Area_name, y = total_prod_days)) +
geom_col() +
ggtitle("Total Production Days for most productive areas", ) +
ylab("Production days") +
xlab("Area")
## histograms
wells %>% filter(Cond_prod_vol_m3 > 0) %>%
ggplot() +
geom_histogram(mapping = aes(Cond_prod_vol_m3))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
wells %>% filter(Cond_prod_vol_m3 > 100, Cond_prod_vol_m3 < 1000) %>%
ggplot() +
geom_histogram(mapping = aes(Cond_prod_vol_m3))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
wells %>% filter(Cond_prod_vol_m3 > 100, Cond_prod_vol_m3 < 1000) %>%
ggplot() +
geom_histogram(mapping = aes(Cond_prod_vol_m3), bins=200) +
ggtitle("Distribution of condensate production per production period") +
xlab(expression("Condensate production per production period m"^3))
Data analysis We can check general trends in production by formation and area. Lets start analyzing by formation ## tables
# Using tables we can see how many wells we have for each formation
wells %>% group_by(Formtn_name) %>%
summarize(
count_wells = n_distinct(Wa_num),#count unique well numbers (multiple prod. periods)
mean_wtr_prod = mean(Water_prod_vol_m3)
) %>% arrange(desc(count_wells))
## # A tibble: 92 x 3
## Formtn_name count_wells mean_wtr_prod
## <chr> <int> <dbl>
## 1 MONTNEY 4745 150.
## 2 JEAN MARIE 1531 3.21
## 3 HALFWAY 659 62.2
## 4 BLUESKY 589 2038.
## 5 CADOMIN 535 10.7
## 6 NOTIKEWIN 406 5.03
## 7 BALDONNEL 356 62.3
## 8 BLUESKY-GETHING-MONTNEY 354 1.34
## 9 GETHING 330 2.33
## 10 BOUNDARY LAKE 278 483.
## # ... with 82 more rows
# we can find which areas / formations suspected to have more water disposal wells?
wells %>% group_by(Area_name) %>%
summarize(
count_wells = n_distinct(Wa_num),
mean_wtr_prod = mean(Water_prod_vol_m3)
) %>% arrange(desc(count_wells))
## # A tibble: 169 x 3
## Area_name count_wells mean_wtr_prod
## <chr> <int> <dbl>
## 1 HERITAGE 2921 155.
## 2 NORTHERN MONTNEY 1797 145.
## 3 HELMET 549 15.6
## 4 DEEP BASIN 528 2.91
## 5 GUNNELL CREEK 471 2.46
## 6 BOUNDARY LAKE 313 431.
## 7 RING 213 1.41
## 8 HAY RIVER 211 4481.
## 9 SIERRA 207 27.3
## 10 HORN RIVER 200 97.5
## # ... with 159 more rows
We can make our data in the long format and have all types of hydrocarbon production in one column using gather function.
alt text here
wells %>% select(Water_prod_vol_m3, Gas_prod_vol_e3m3, Oil_prod_vol_m3, Cond_prod_vol_m3) %>%
head(3)
## Water_prod_vol_m3 Gas_prod_vol_e3m3 Oil_prod_vol_m3 Cond_prod_vol_m3
## 1 91.4 0 0 0
## 2 1291.0 0 0 0
## 3 2528.0 0 0 0
prod_df <- wells %>%
gather(key = "Prod_type",
value = "Prod_volume",
c(Gas_prod_vol_e3m3, Oil_prod_vol_m3, Cond_prod_vol_m3)) %>%
select(Water_prod_vol_m3, Prod_volume, Prod_year, Prod_ym, Prod_type, Prod_days, Formtn_name)
prod_df %>% select(Water_prod_vol_m3, Prod_type) %>% head(3)
## Water_prod_vol_m3 Prod_type
## 1 91.4 Gas_prod_vol_e3m3
## 2 1291.0 Gas_prod_vol_e3m3
## 3 2528.0 Gas_prod_vol_e3m3
unique(prod_df$Prod_type)
## [1] "Gas_prod_vol_e3m3" "Oil_prod_vol_m3" "Cond_prod_vol_m3"
prod_df %>%
filter(Prod_year == 2021) %>%
ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type)) +
geom_point()
# limit axis
prod_df %>%
filter(Prod_year == 2021) %>%
ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type)) +
geom_point() +
xlim(0,10000) +
ylim(0,25000)
## Warning: Removed 45 rows containing missing values (geom_point).
prod_df %>%
filter(Prod_year == 2021) %>%
ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type)) +
geom_point() +
facet_wrap(~Prod_type)
prod_df %>%
filter(Prod_year == 2021) %>%
ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type, alpha = 0.3)) +
geom_point() +
facet_wrap(~Prod_type, scales = "free") # scales free => not fixed limits of yaxis
# I expect that Prod_volume > 0 will indicate the SWD disposal wells, let's remove those from the plot
prod_df %>%
filter(Prod_year == 2021, Prod_volume > 0) %>% # Prod_colume > 0 to remove disposal wells
ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type, alpha = 0.3)) +
geom_point() +
ggtitle("Water production per HC type") +
facet_wrap(~Prod_type, scales = "free") +
xlim(0,5000)
## Warning: Removed 126 rows containing missing values (geom_point).
wells %>% subset(Prod_year > 2000) %>%
group_by(Prod_ym) %>%
summarise(
prod_per_period = sum(Cond_prod_vol_m3)
) %>%
ggplot(aes(x = Prod_ym, y = prod_per_period)) +
theme(axis.text.x = element_text(angle = 90)) +
geom_col()
wells %>%
subset(Prod_year > 2000) %>%
group_by(Prod_year) %>%
summarise(
prod_per_year = sum(Cond_prod_vol_m3)
) %>%
ggplot(aes(x = Prod_year, y = prod_per_year)) +
geom_col()
class(wells$Prod_period)
## [1] "integer"
unique(wells$Formtn_name)[1:20]
## [1] "QUATERNARY" "CRETACEOUS" "CARDIUM SAND"
## [4] "DOE CREEK" "DUNVEGAN" "BASE OF FISH SCALES"
## [7] "SCATTER" "PADDY" "PADDY- CADOTTE"
## [10] "CADOTTE" "SPIRIT RIVER" "NOTIKEWIN"
## [13] "FALHER" "FALHER A" "FALHER B"
## [16] "FALHER C" "FALHER D" "WILRICH"
## [19] "BLUESKY" "BASAL BLUESKY"
prod_df %>%
filter(Formtn_name == "BOUNDARY LAKE") %>%
group_by(Prod_ym, Formtn_name, Prod_type) %>%
summarise(
total_prod = sum(Prod_volume)
) %>%
ungroup() %>%
ggplot(aes(x = Prod_ym, y = total_prod, group=Formtn_name, col=Prod_type)) +
geom_line() +
scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01",
"2019-01","2020-01", "2021-01")) +
facet_grid(Prod_type ~ ., scales = "free")
## `summarise()` has grouped output by 'Prod_ym', 'Formtn_name'. You can override using the `.groups` argument.
# all formations on one chart
prod_df %>%
group_by(Prod_ym, Prod_type) %>%
summarise(
total_prod = sum(Prod_volume)
) %>%
ungroup() %>%
ggplot(aes(x = Prod_ym, y = total_prod, group=1, col=Prod_type)) +
geom_line() +
scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01",
"2019-01","2020-01", "2021-01")) +
facet_grid(Prod_type ~ ., scales = "free")
## `summarise()` has grouped output by 'Prod_ym'. You can override using the `.groups` argument.
We can apply simple trick to order the formations from the highest production to the lowest (using mutate function).
We save most productive formations separately, they will be useful later to plot production in time.
prod_by_formation <- wells %>%
group_by(Formtn_name) %>%
summarise(
oil_prod_total = sum(Oil_prod_vol_m3),
gas_prod_total = sum(Gas_prod_vol_e3m3),
cond_prod_total = sum(Cond_prod_vol_m3),
water_prod_total = sum(Water_prod_vol_m3)
) %>%
ungroup()
most_gas <- prod_by_formation %>%
arrange(desc(gas_prod_total)) %>%
head(5)
ggplot(most_gas, aes(x = Formtn_name, y = gas_prod_total)) +
geom_col(fill='#ff6347') # change color to red
# trick using mutate to order the bars according to production value
most_gas <- most_gas %>%
mutate(Formtn_name=factor(Formtn_name, levels=Formtn_name))
ggplot(most_gas, aes(x = Formtn_name, y = gas_prod_total)) +
geom_col(fill='#ff6347') # change color to red
# same for oil production
most_oil <- prod_by_formation %>%
arrange(desc(oil_prod_total)) %>%
head(5) %>%
mutate(Formtn_name=factor(Formtn_name, levels=Formtn_name))
ggplot(most_oil, aes(x = Formtn_name, y = oil_prod_total)) +
geom_col()
# and for condensate production
most_cond <- prod_by_formation %>%
arrange(desc(cond_prod_total)) %>%
head(5) %>%
mutate(Formtn_name=factor(Formtn_name, levels=Formtn_name))
ggplot(most_gas, aes(x = Formtn_name, y = gas_prod_total)) +
geom_col(fill="#6495ed")
We can also visualize the same set of formations. To define the set, we will take 5 most productive gas, oil and condensate formations.
# define 5 most productive formations
best_formations <- unique(most_oil$Formtn_name,
most_gas$Formtn_name,
most_cond$Formtn_name)
wells %>%
subset(Formtn_name %in% best_formations) %>%
ggplot(aes(x = Formtn_name, y = Oil_prod_vol_m3, fill=Formtn_name)) +
geom_bar(stat = "summary", fun = "sum") +
ggtitle("Total oil production by formation") +
xlab("Formation") +
ylab("Oil production [m3]")
prod_by_formation %>%
subset(Formtn_name %in% best_formations) %>%
ggplot(aes(x = Formtn_name, y = gas_prod_total, fill=Formtn_name)) +
geom_bar(stat = "summary", fun = "sum") +
ggtitle("Total gas production by formation") +
xlab("Formation") +
ylab("Gas production [e3m3]")
prod_by_formation %>%
subset(Formtn_name %in% best_formations) %>%
ggplot(aes(x = Formtn_name, y = cond_prod_total, fill=Formtn_name)) +
geom_bar(stat = "summary", fun = "sum") +
ggtitle("Total condensate production by formation") +
xlab("Formation") +
ylab("Condensate production [m3]")
prod_by_formation %>%
subset(Formtn_name %in% best_formations) %>%
ggplot(aes(x = Formtn_name, y = water_prod_total, fill=Formtn_name)) +
geom_bar(stat = "summary", fun = "sum") +
ggtitle("Total water production by formation") +
xlab("Formation") +
ylab("Water production [m3]")
#library(tidyverse)
ggplot(data = wells, aes(x = Prod_year)) + geom_bar()
# FILTER FROMATIONS WITH MOST WELLS
ggplot(filter(wells, Formtn_name %in% best_formations & Prod_year < 2021)) +
geom_bar(aes(x = Formtn_name, fill=Formtn_name)) +
scale_fill_manual(values = c('#d0d1e6','#a6bddb','#67a9cf','#1c9099','#016c59')) + # taken e.g. from colorbrewer!
facet_wrap(~ Prod_year) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank())
If we want to compare oil and gas production in one plot its useful to produce the formation of the dataset (https://www.joyofdata.de/blog/wp-content/uploads/2012/11/Clipboard16.png)
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.4
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
head(prod_df)
## Water_prod_vol_m3 Prod_volume Prod_year Prod_ym Prod_type Prod_days
## 1 91.4 0 2018 2018-09 Gas_prod_vol_e3m3 30.0
## 2 1291.0 0 2020 2020-03 Gas_prod_vol_e3m3 30.7
## 3 2528.0 0 2019 2019-01 Gas_prod_vol_e3m3 30.8
## 4 1068.0 0 2017 2017-09 Gas_prod_vol_e3m3 23.0
## 5 6948.7 0 2018 2018-05 Gas_prod_vol_e3m3 31.0
## 6 109.3 0 2020 2020-01 Gas_prod_vol_e3m3 31.0
## Formtn_name
## 1 QUATERNARY
## 2 QUATERNARY
## 3 QUATERNARY
## 4 QUATERNARY
## 5 QUATERNARY
## 6 QUATERNARY
prod_per_period <- prod_df %>%
filter(Formtn_name %in% best_formations) %>%
group_by(Prod_ym, Prod_type, Formtn_name) %>%
summarise(
prod_per_period = sum(Prod_volume)
)
## `summarise()` has grouped output by 'Prod_ym', 'Prod_type'. You can override using the `.groups` argument.
p <- ggplot(prod_per_period, aes(x=Prod_ym, y=prod_per_period, group=Formtn_name, fill=Formtn_name)) +
geom_area(alpha=0.3) +
scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01",
"2019-01","2020-01", "2021-01")) +
facet_grid(Prod_type ~ ., scales = "free")
p <- ggplotly(p, tooltip = "text")
p
p <- ggplot(prod_per_period, aes(x=Prod_ym, y=prod_per_period, group=Prod_type,
text = paste("Formation:", Formtn_name,
"<br>Prod. type:", Prod_type), fill=Prod_type)) +
geom_area(colour="#636363", alpha=0.3) +
scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01",
"2019-01","2020-01", "2021-01")) +
facet_wrap(Formtn_name ~ ., scales = "free") +
ylab("") +
xlab("")
p <- ggplotly(p, tooltip = "text")
p
# load well information (coordinates)
# load shapefiles with areas
library(sf)
## Warning: package 'sf' was built under R version 4.0.4
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
# load dplyr and ggplot2 in case this is the only chunk you want to run
library(dplyr)
library(ggplot2)
wells_coords <- read.csv(unzip("BCOGC_data/drill_csv.zip", "wells.csv"), skip = 1)
head(wells_coords)#bigger file
## Well.Surf.Loc Well.Name
## 1 D- 080-H/092-G-03 ROYAL CITY NO. 1 D- 080-H/092-G-03
## 2 B- 050-D/082-G-01 CANADIAN KOOTENAY NO. 1 B- 050-D/082-G-01
## 3 D- 055-A/082-G-02 BORDER OILS NO. 1 D- 055-A/082-G-02
## 4 13-11-081-17 CANLIN PINGEL 13-11-081-17
## 5 A- 065-E/093-I-16 CVE ENERGY ET AL LONE MOUNTAIN NO. 1 A- 065-E/093-I-16
## 6 C- 048-D/103-G-05 STRATHCONA QUEEN CHARLOTTE NO.1 C- 048-D/103-G-05
## WA.Num Surf.Nad27.Lat Surf.Nad27.Long Surf.Nad83.Lat Surf.Nad83.Long
## 1 1 49084860 123064913 49085314 123065320
## 2 2 49020730 114294872 49035335 114497851
## 3 3 49025250 114331128 49047892 114554121
## 4 4 NA NA 56004511 120331605
## 5 5 54530889 120254600 54530863 120255126
## 6 6 53172250 131590333 53171412 131575692
## Surf.UTM.Zone.Num Surf.UTM83.Northng Surf.UTM83.Eastng Surf.North Surf.East
## 1 10 5443925 491629.8 -192.0 -71.3
## 2 11 5434400 682875.8 NA NA
## 3 11 5435662 678718.6 NA NA
## 4 10 6210173 652456.4 -201.2 221.3
## 5 10 6085099 664789.3 274.9 -285.3
## 6 9 5908329 302312.1 -463.9 -107.9
## Surf.Owner Ground.Elevtn Directional.Flag Surf.Ref.Corner Surf.Ref.Unit
## 1 CROWN 1.5 N NE 80
## 2 CROWN NA N NW 50
## 3 CROWN NA N SW 55
## 4 CROWN 758.6 N NW NA
## 5 CROWN 971.6 N SE 65
## 6 CROWN 8.3 N NE 48
## Surf.Ref.Block Surf.Ref.Map Surf.Ref.Lsd Surf.Ref.Sect Surf.Ref.Twp
## 1 H 092-G-03 NA NA NA
## 2 D 082-G-01 NA NA NA
## 3 A 082-G-02 NA NA NA
## 4 13 11 81
## 5 E 093-I-16 NA NA NA
## 6 D 103-G-05 NA NA NA
## Surf.Ref.Range Surf.DLS.Exception Surf.Lsd Surf.Sect Surf.Twp Surf.Range
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 17 13 11 81 17
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## Surf.Qtr.Unit Surf.NTS.Exception Surf.Unit Surf.Block Surf.Map Oper.Id
## 1 D 80 H 092-G-03 0
## 2 B 50 D 082-G-01 0
## 3 D 55 A 082-G-02 0
## 4 NA 1018
## 5 A 65 E 093-I-16 1016
## 6 C 48 D 103-G-05 11075
## Oper.Abbrev Oper.Abbrev2 Optnl.Unit Well.Area.Name Well.Name.Date
## 1 ROYAL CITY NO. 1 19480610
## 2 CANADIAN KOOTENAY NO. 1 19490903
## 3 BORDER OILS NO. 1 19480704
## 4 CANLIN PINGEL 20171206
## 5 CVE ENERGY ET AL LONE MOUNTAIN NO. 1 20180607
## 6 STRATHCONA QUEEN CHARLOTTE NO.1 20200901
## Special.Well.Class.Code Test.Hole
## 1 NO N
## 2 NO N
## 3 N
## 4 NO N
## 5 NO N
## 6 NO N
names(wells_coords)
## [1] "Well.Surf.Loc" "Well.Name"
## [3] "WA.Num" "Surf.Nad27.Lat"
## [5] "Surf.Nad27.Long" "Surf.Nad83.Lat"
## [7] "Surf.Nad83.Long" "Surf.UTM.Zone.Num"
## [9] "Surf.UTM83.Northng" "Surf.UTM83.Eastng"
## [11] "Surf.North" "Surf.East"
## [13] "Surf.Owner" "Ground.Elevtn"
## [15] "Directional.Flag" "Surf.Ref.Corner"
## [17] "Surf.Ref.Unit" "Surf.Ref.Block"
## [19] "Surf.Ref.Map" "Surf.Ref.Lsd"
## [21] "Surf.Ref.Sect" "Surf.Ref.Twp"
## [23] "Surf.Ref.Range" "Surf.DLS.Exception"
## [25] "Surf.Lsd" "Surf.Sect"
## [27] "Surf.Twp" "Surf.Range"
## [29] "Surf.Qtr.Unit" "Surf.NTS.Exception"
## [31] "Surf.Unit" "Surf.Block"
## [33] "Surf.Map" "Oper.Id"
## [35] "Oper.Abbrev" "Oper.Abbrev2"
## [37] "Optnl.Unit" "Well.Area.Name"
## [39] "Well.Name.Date" "Special.Well.Class.Code"
## [41] "Test.Hole"
wells_coords <- wells_coords %>% select(Surf.UTM83.Northng, Surf.UTM83.Eastng, Surf.UTM.Zone.Num, Well.Area.Name, Oper.Abbrev, Surf.Owner)
wells_coords <- wells_coords %>% filter(complete.cases(.), Surf.UTM.Zone.Num %in% c(10,11))
unique(wells_coords$Surf.UTM.Zone.Num)
## [1] 10 11
# create 2 geodataframes with crs values according to UTM zone
wells_pts_1 <- wells_coords %>%
filter(Surf.UTM.Zone.Num == 10) %>%
st_as_sf(coords = c("Surf.UTM83.Eastng", "Surf.UTM83.Northng"), crs = 26910) %>%
st_transform(4326) # want to use latlon coords
wells_pts_2 <- wells_coords %>%
filter(Surf.UTM.Zone.Num == 11) %>%
st_as_sf(coords = c("Surf.UTM83.Eastng", "Surf.UTM83.Northng"), crs = 4326)
wells_pts <- rbind(wells_pts_1, wells_pts_2)
st_crs(wells_pts)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
geoph_lines <- st_read("BCOGC_data/shapefiles/PASR_GEOPHYSICAL_LN.shp")
## Reading layer `PASR_GEOPHYSICAL_LN' from data source `C:\Users\Paulina-laptop\Desktop\RRR\BCOGC_data\shapefiles\PASR_GEOPHYSICAL_LN.shp' using driver `ESRI Shapefile'
## Simple feature collection with 211710 features and 19 fields
## Geometry type: MULTILINESTRING
## Dimension: XY
## Bounding box: xmin: 820855.6 ymin: 535917.4 xmax: 1885851 ymax: 1672546
## Projected CRS: NAD83 / BC Albers
geoph_lines_dawson <- filter(geoph_lines, PROG_NAME %in% c("DAWSON 3D","DAWSON 3D EXTENSION"))
geoph_lines_dawson <- st_transform(geoph_lines_dawson, st_crs(wells_pts))
dawson_area <- st_union(geoph_lines_dawson) %>%
st_convex_hull()
## although coordinates are longitude/latitude, st_union assumes that they are planar
wells_dawson <- wells_pts %>%
filter(st_intersects(x = ., y = dawson_area, sparse = FALSE))
## Warning in st_is_longlat(x): bounding box has potentially an invalid value range
## for longlat data
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
ggplot() +
geom_sf(data = wells_dawson, size=1, alpha=0.2, aes(colour=Surf.Owner)) +
geom_sf(data = geoph_lines_dawson, size=0.1)
names(wells_dawson)
## [1] "Surf.UTM.Zone.Num" "Well.Area.Name" "Oper.Abbrev"
## [4] "Surf.Owner" "geometry"
pools <- st_read("BCOGC_data/shapefiles/POOL_CONTOURS_LN.shp")
## Reading layer `POOL_CONTOURS_LN' from data source `C:\Users\Paulina-laptop\Desktop\RRR\BCOGC_data\shapefiles\POOL_CONTOURS_LN.shp' using driver `ESRI Shapefile'
## Simple feature collection with 7963 features and 12 fields
## Geometry type: MULTILINESTRING
## Dimension: XY
## Bounding box: xmin: 1091006 ymin: 1070402 xmax: 1386930 ymax: 1679933
## Projected CRS: NAD83 / BC Albers
pools <- st_transform(pools, st_crs(wells_dawson))
pools_dawson <- pools %>%
filter(st_within(x = ., y = dawson_area, sparse = FALSE))
## although coordinates are longitude/latitude, st_within assumes that they are planar
ggplot() +
geom_sf(data = wells_dawson, alpha=0.2) +
geom_sf(data = geoph_lines_dawson, alpha=0.2) +
geom_sf(data = pools_dawson, aes(colour=FLUID_TYPE)) +
scale_color_discrete(name = "Producing pools") +
xlab("Longitude") +
ylab("Latitude")
fields <- st_read("BCOGC_data/shapefiles/FIELDS_PY.shp")
## Reading layer `FIELDS_PY' from data source `C:\Users\Paulina-laptop\Desktop\RRR\BCOGC_data\shapefiles\FIELDS_PY.shp' using driver `ESRI Shapefile'
## Simple feature collection with 570 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1079392 ymin: 1066850 xmax: 1387498 ymax: 1680540
## Projected CRS: NAD83 / BC Albers
ggplot() +
geom_sf(data = fields)
fields <- st_transform(fields, st_crs(wells_dawson))
# let's focus on the fields in the smaller area (crop the layer)
fields <- st_crop(fields, xmin = -121, xmax = -120, ymin = 55.7, ymax = 56)
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
unique(fields$FLDRNM)
## [1] "MICA" "PARKLAND" "DOE" "DAWSON CREEK"
## [5] "GROUNDBIRCH" "SATURN" "SUNSET PRAIRIE" "BRIAR RIDGE"
## [9] "BRASSEY" "TOWER LAKE" "SUNDOWN" "SUNRISE"
ggplot() +
geom_sf(data = wells_dawson, alpha=0.2) +
geom_sf(data = geoph_lines_dawson, alpha=0.2) +
geom_sf(data = pools, aes(colour=FLUID_TYPE)) +
geom_sf(data = fields, alpha=0.1, linetype="dashed") +
scale_color_discrete(name = "Producing pools") +
coord_sf(crs = 4326, xlim = c(-121,-120), ylim = c(55.7, 56)) +
xlab("Longitude") +
ylab("Latitude")